clear all

* Set your directory path here
local root "D:\Understanding_Bank_Payouts"

set more off
set varabbrev off, perm

local makefile yes

if "`makefile'" == "yes" {

* Number of analysts from IBES

use "`root'\Data\IBES\Number_of_analysts_19762015.dta" 

collapse (max) numest (firstnm) ticker oftic, by(cusip fpedats)
generate year = year(fpedats)
generate cusip6 = substr(cusip,1,6)
collapse (max) numest (firstnm) ticker oftic, by(cusip6 year)
keep year cusip6 numest
sort cusip6 year

save "TEMP_nr_of_analysts.dta", replace

* Lagged quarterly variables for insider trading

use "`root'\Data\Insider\allfirms_quarterly.dta"

gen helper = quarter + 135
format helper %tq
drop quarter
rename helper quarter
xtset instkey quarter, quarterly

foreach var of varlist antQ ansQ npr_countQ npr_volumeQ ///
pr_netbuyers_volumeQ pr_netsellers_volumeQ silenceQ net_volumeQ volume_PQ ///
volumeC_PQ volumeTOP_PQ volume_SQ volumeC_SQ volumeTOP_SQ  ///
volume_raw_PQ volume_raw_SQ value_raw_PQ value_raw_SQ {
	generate L`var' = L.`var'
	generate F`var' = F.`var'
	}

generate LpratioQ = Lvolume_raw_PQ / (Lvolume_raw_PQ + Lvolume_raw_SQ)
label variable LpratioQ "Volume purchased divided by total volume - quarterly lag"
generate LsratioQ = Lvolume_raw_SQ / (Lvolume_raw_PQ + Lvolume_raw_SQ)
label variable LsratioQ "Volume sold divided by total volume - quarterly lag"

generate FpratioQ = Fvolume_raw_PQ / (Fvolume_raw_PQ + Fvolume_raw_SQ)
label variable FpratioQ "Volume purchased divided by total volume - quarterly lead"
generate FsratioQ = Fvolume_raw_SQ / (Fvolume_raw_PQ + Fvolume_raw_SQ)
label variable FsratioQ "Volume sold divided by total volume - quarterly lead"

keep instkey quarter L* F*
compress
save temp_insider_trading_lags_quarterly.dta, replace
clear

* Lagged yearly variables for insider trading

use "`root'\Data\Insider\allfirms_yearly.dta"

xtset instkey year, yearly

foreach var of varlist antY ansY npr_countY npr_volumeY ///
pr_netbuyers_volumeY pr_netsellers_volumeY silenceY net_volumeY volume_PY ///
volumeC_PY volumeTOP_PY volume_SY volumeC_SY volumeTOP_SY  ///
volume_raw_PY volume_raw_SY value_raw_PY value_raw_SY {
	generate L`var' = L.`var'
	generate F`var' = F.`var'
	}

generate LpratioY = Lvolume_raw_PY / (Lvolume_raw_PY + Lvolume_raw_SY)
label variable LpratioY "Volume purchased divided by total volume - yearly lag"
generate LsratioY = Lvolume_raw_SY / (Lvolume_raw_PY + Lvolume_raw_SY)
label variable LsratioY "Volume sold divided by total volume - yearly lag"

generate FpratioY = Fvolume_raw_PY / (Fvolume_raw_PY + Fvolume_raw_SY)
label variable FpratioY "Volume purchased divided by total volume - yearly lead"
generate FsratioY = Fvolume_raw_SY / (Fvolume_raw_PY + Fvolume_raw_SY)
label variable FsratioY "Volume sold divided by total volume - yearly lead"

keep instkey year L* F*
compress
save temp_insider_trading_lags_yearly.dta, replace
clear

* Compensation until 2005

import excel "`root'\Data\Originals\SNL_executive_ownership_compensation\Executive_compensation_SNL_Peter_cleaned_19952006.xlsx", sheet("Sheet1") firstrow
generate salary_to_total = salary/totalcomp
generate bonus_to_total = bonus/totalcomp
generate ltcomp_to_total = longtermcomp/totalcomp
preserve
collapse (mean) salary bonus annualcomp longtermcomp totalcomp othercomp salary_to_total bonus_to_total ltcomp_to_total, by(instkey year)
save temp_compensation_to2005_averages.dta, replace
restore 
keep if exectitle =="President" | exectitle == "President & CEO" | exectitle == "Chairman & CEO" | exectitle == "Chairman & President" | exectitle == "Chairman & Pres." ///
	| exectitle == "Chairman, President & CEO" | exectitle == "Chief Executive Officer" | exectitle  == "Chief Exec. Officer" 
collapse (mean) salary bonus annualcomp longtermcomp totalcomp othercomp salary_to_total bonus_to_total ltcomp_to_total, by(instkey year)
foreach var of varlist salary bonus annualcomp longtermcomp totalcomp othercomp salary_to_total bonus_to_total ltcomp_to_total {
	rename `var' `var'P
	}
save temp_compensation_to2005_president.dta, replace
clear

* Compensation for 2006-2013

import excel "`root'\Data\Originals\SNL_executive_ownership_compensation\Executive_compensation_SNL_Peter_cleaned_20062013.xlsx", sheet("Sheet1") firstrow
destring otherannualcomp stockawards_fairvlaue optionawards_fairvalue nonequity_incentivecomp changeinpension totalcomp, replace force
generate salary_to_total = salary/totalcomp
generate bonus_to_total = bonus/totalcomp
generate rstock_to_total = stockawards_fairvlaue/totalcomp
generate options_to_total = optionawards_fairvalue/totalcomp
generate nonequityinc_to_total = nonequity_incentivecomp/totalcomp
generate chgpension_to_total = changeinpension/totalcomp
generate equitybased_to_total = rstock_to_total + options_to_total
preserve 
collapse (mean) salary_to_total bonus_to_total rstock_to_total options_to_total nonequityinc_to_total chgpension_to_total equitybased_to_total, by(instkey year)
save temp_compensation_2006to2013_averages.dta, replace
restore 
keep if exectitle =="President" | exectitle == "President & CEO" | exectitle == "Chairman & CEO" | exectitle == "Chairman & President" | exectitle == "Chairman & Pres." ///
	| exectitle == "Chairman, President & CEO" | exectitle == "Chief Executive Officer" | exectitle  == "Chief Exec. Officer" 
collapse (mean) salary_to_total bonus_to_total rstock_to_total options_to_total nonequityinc_to_total chgpension_to_total equitybased_to_total, by(instkey year)
foreach var of varlist salary_to_total bonus_to_total rstock_to_total options_to_total nonequityinc_to_total chgpension_to_total equitybased_to_total {
	rename `var' `var'P
	}
save temp_compensation_2006to2013_president.dta, replace
clear

* Insider ownership
import excel "`root'\Data\Originals\SNL_executive_ownership_compensation\Insider_ownership_SNL_Peter_cleaned.xlsx", sheet("Sheet1") firstrow
reshape long pctown_insiders_, i(instkey) j(quarter) string
generate year = substr(quarter,1,4)
generate calendarquarter = substr(quarter,-1,1)
destring year, force replace
destring calendarquarter, force replace
gen q =  yq(year, calendarquarter)
rename quarter qtext
rename q quarter
format quarter %tq
drop qtext bankname
rename pctown_insiders_ pctown_insidersSNL
destring pctown_insidersSNL, replace force
gen helper = pctown_insidersSNL/100
replace pctown_insidersSNL = helper
replace pctown_insidersSNL = . if helper > 1
drop helper
	preserve
drop year calendarquarter
save temp_insider_ownership_SNLquarterly.dta, replace
	restore 
keep if calendarquarter == 4
drop quarter
save temp_insider_ownership_SNLyearly.dta, replace
clear

import excel "`root'\Data\SNL_HMDA_2001to2013_cleanedPeter.xlsx", sheet("data") firstrow

rename KeyInstn instkey
rename HMDAAsOf year
rename HMDALoanValue loanvalue
label variable loanvalue "HMDA loan value"
rename HMDALoanCount loancount
label variable loancount "HMDA loan count"
rename HMDAApplicationCount appcount
label variable appcount "HMDA loan application count"
rename HMDAApplicationValue appvalue
label variable appvalue "HMDA loan application value"
drop shortname

save tempHMDA.dta, replace
clear

import excel "`root'\Data\fedfunds_Peter.xlsx", sheet("cleaned_Peter") firstrow

sort year quarter
by year: egen fedfunds_yavgqbegin = mean(fedfunds_qbegin)
by year: egen fedfunds_yavgqend = mean(fedfunds_qend)

label variable fedfunds_yavgqbegin "Yearly average Fed funds rate, based on beginning of q values"
label variable fedfunds_yavgqend "Yearly average Fed funds rate, based on end of q values"
label variable fedfunds_qend "Fed funds rate at the end of the quarter"
label variable fedfunds_qbegin "Fed funds rate at the beginning of the quarter"
label variable fedfunds_yend "Fed funds rate at the end of the year"
label variable fedfunds_ybegin "Fed funds rate at the beginning of the year"

drop quarter year
save tempfedrate.dta, replace

* Now clean the cross-sectional data from SNL

foreach var in cash_bal_duedepinst now_other cbs_tot_trans_acc tot_nontrans_acc_cbk ///
	odi_tot_trans_acc tot_nontrans_acc_other trans_acc nontrans_acc fedfunds_p fedfunds_s ///
	ffrepo_intexpense ffrepo_intrevenue {

	import excel "`root'\Data\Originals\SNL_Interbank_PeterMay2015\Y`var'.xls", cellrange(A3:AD5042) firstrow clear
	
	drop in 1
	drop A B C D
	rename E instkey
	destring instkey, replace force
	drop if instkey == .
 	
	rename  F    `var'2014		// Now rename all the other variables, one by one
	rename 	G	 `var'2013
	rename 	H	 `var'2012
	rename 	I	 `var'2011
	rename 	J	 `var'2010
	rename 	K	 `var'2009
	rename 	L	 `var'2008
	rename 	M	 `var'2007
	rename 	N	 `var'2006
	rename 	O	 `var'2005
	rename 	P	 `var'2004
	rename 	Q	 `var'2003
	rename 	R	 `var'2002
	rename 	S	 `var'2001
	rename 	T	 `var'2000
	rename 	U	 `var'1999
	rename 	V	 `var'1998
	rename 	W	 `var'1997
	rename 	X	 `var'1996
	rename 	Y	 `var'1995
	rename 	Z	 `var'1994
	rename 	AA	 `var'1993
	rename 	AB	 `var'1992
	rename 	AC	 `var'1991
	rename 	AD	 `var'1990

	reshape long `var', i(instkey) j(year)	// Reshape to panel format
	destring `var', replace force
	save `var'TEMP.dta, replace
	
	}

use cash_bal_duedepinstTEMP.dta, clear
generate helper = cash_bal_duedepinst	
replace cash_bal_duedepinst = 0 if helper == .
drop helper

foreach var in now_other cbs_tot_trans_acc tot_nontrans_acc_cbk ///
	odi_tot_trans_acc tot_nontrans_acc_other trans_acc nontrans_acc fedfunds_p fedfunds_s ///
	ffrepo_intexpense ffrepo_intrevenue {

	merge 1:1 instkey year using `var'TEMP.dta, nogenerate
	erase `var'TEMP.dta
	generate helper = `var' 
	replace `var' = 0 if helper == .		
	drop helper
	}

generate interbank_balance = cash_bal_duedepinst - (trans_acc + nontrans_acc)
generate fedfunds_balance = fedfunds_p - fedfunds_s

save tempinterbank.dta, replace
erase cash_bal_duedepinstTEMP.dta
clear
	
use "`root'\Data\CRSP_quarterly_data_calculated.dta"
rename cusip cusip8 								
save tempcrsp.dta, replace			

import excel "`root'\Data\Originals\SNL_Institutional_PeterMay2015\InstOwn_BHCs1999to2015.xls", ///  Institutional ownership
sheet("Institutional Ownership") cellrange(A1:BO659) firstrow clear
rename SNLID instkey
reshape long INSTSHARES, i(instkey) j(period) string	// Reshape to panel format
gen helper = substr(period,2,6)
drop period
rename helper period
save instshares.dta, replace

import excel "`root'\Data\Originals\SNL_Institutional_PeterMay2015\InstOwn_BHCs1999to2015.xls", ///
sheet("Shares Outstanding") cellrange(A1:BO659) firstrow clear
rename SNLID instkey
destring SHARESOUT*, replace force
reshape long SHARESOUT, i(instkey) j(period) string	// Reshape to panel format
gen helper = substr(period,2,6)
drop period
rename helper period
save sharesout.dta, replace
merge 1:1 instkey period using instshares.dta, assert(1 3) nogenerate
generate instown_perc = INSTSHARES/ SHARESOUT
gen helper = instown_perc
replace instown_perc = 1 if helper > 1 & helper != .	
sum instown_perc, detail
generate quarter = quarterly(period, "YQ", 2015)
gen q = substr(period,6,1)
generate year = substr(period,1,4)
destring q year, replace force
keep instkey year instown_perc
dups, t drop
drop _expand
dups instkey year, t drop
drop _expand

save instownSNL.dta, replace
erase sharesout.dta
erase instshares.dta

use "`root'\Data\TR_institutional\Holdings_summary_1980to2013.dta", clear
gen q = quarter(rdate) 
generate year = year(rdate)
keep if q == 4
rename cusip cusip8
sort cusip8 year
generate cusip6 = substr(cusip8,1,6)
collapse (mean) instown_perc instown_hhi instown maxinstown numinstowners instblockown numinstblockowners top10instown top5instown, by(cusip6 year)
save instownTR.dta, replace			// Institutional ownership from Thomson Reuters

use "`root'\Data\TR_institutional\Holdings_summary_1980to2013.dta", clear
gen q = quarter(rdate) 
gen year = year(rdate)
gen quarter = yq(year, q)
rename cusip cusip8
sort cusip8 year
generate cusip6 = substr(cusip8,1,6)
collapse (mean) instown_perc instown_hhi instown maxinstown numinstowners instblockown numinstblockowners top10instown top5instown, by(cusip6 quarter)
save instownTR_quarterly.dta, replace			// Institutional ownership from Thomson Reuters

use `panelfile', clear
drop _expand 
drop r
drop quarter										//Redefine the quarter variable to a tq date format
generate quarter = quarterly(period, "YQ", 2015)
format quarter %tq

g cusip8 = substr(cusip,1,8)
merge m:1 cusip8 quarter using tempcrsp.dta, update replace keep(1 3 4 5)
erase tempcrsp.dta
drop cusip8 _merge

drop cusip6 cusip2
g cusip6 = substr(cusip,1,6)

* Keep only banks
rename divpaidperordshare divpsh
summarize cpi if year == 2000
local i = r(mean)
display `i'
generate divpshR = divpsh*`i'/cpi
summarize divpsh divpshR

generate reppsh = commstockrep/commsharesout

* 1) Differentiate between payers, nonpayers and switchers
* First, who is a payer and who is a nonpayer
sort instkey quarter
by instkey: egen divdata_count = count(divpsh)
by instkey: egen tot_avg_div = mean(divpsh)
generate ever_payer = (tot_avg_div > 0)
replace ever_payer =. if tot_avg_div == .

* Which is the first quarter that the firm pays dividends?
by instkey: egen aaa_firstquarter_paydiv = min(quarter) if divpsh > 0 & divpsh != .
by instkey: egen firstquarter_paydiv = min(aaa_firstquarter_paydiv)
by instkey: egen aaa_lastquarter_paydiv = max(quarter) if divpsh > 0 & divpsh != .
by instkey: egen lastquarter_paydiv = min(aaa_lastquarter_paydiv)

sort instkey year quarter
by instkey year: egen avg_divyield = mean(divyield)
label variable avg_divyield "The average dividend yield of the firm this year"
generate divpayer = (avg_divyield > 0)
replace divpayer =. if avg_divyield == .
label variable divpayer "1 if the firm paid dividends this year"

generate paydivs = (divpsh > 0 & divpsh !=.)
generate missing_divdata = (divpsh ==.)
by instkey: egen nr_paydivs = total(paydivs)
by instkey: egen nr_misdivs = total(missing_divdata)
by instkey: egen nrobs = count(instkey)
generate alwayspayer = (nr_misdivs + nr_paydivs == nrobs)
replace alwayspayer = . if nr_misdivs == nrobs

* Once the firm has started paying, how many times a year does it pay?

generate nonmissing_divdata = (missing_divdata == 0)
br missing_divdata nonmissing_divdata
by instkey year: egen divdata_peryear = total(nonmissing_divdata)
generate divpay = (divpsh > 0)
replace divpay = . if divpsh == .
by instkey year: egen div_freq_yr = total(divpay) if quarter > firstquarter_paydiv & quarter < lastquarter_paydiv ///
& divdata_peryear == 4 /* count only the years where we have 4 quarters of dividend data */
by instkey: egen div_freq = mean(div_freq_yr) if quarter > firstquarter_paydiv & quarter < lastquarter_paydiv

preserve
dups instkey div_freq, t drop 
drop _expand
kdensity div_freq
histogram div_freq
estpost summarize div_freq, detail


gen quarterly_payer = (div_freq > 3 & div_freq != .)				// This is a dummy for firms that are quarterly payers
label variable quarterly_payer "Bank pays dividends each quarter"


sort instkey quarter
by instkey: generate reduce = (divpsh < divpsh[_n-1] & divpsh != 0)
by instkey: generate increase = (divpsh > divpsh[_n-1] & divpsh[_n-1] != 0)
by instkey: generate spike = (divpsh > divpsh[_n-1] & ///
divpsh[_n+1] == divpsh[_n-1] & divpsh[_n-1] != . & divpsh !=.)
by instkey: replace increase = . if _n == 1
by instkey: replace reduce = . if _n == 1
generate increase_clean = increase
by instkey: replace increase_clean = 0 if spike == 1
generate reduce_clean = reduce
by instkey: replace reduce_clean = 0 if spike[_n-1] == 1

sort instkey year period
order period instkey div* reduce* increase* spike

sort instkey year quarter
by instkey year: egen divpshY1 = total(divpsh)
label variable divpshY1 "Dividend per share - yearly, sum of quarterly" 
by instkey year: gen divpshY2 = divpsh[_N]*4
label variable divpshY2 "Dividend per share - yearly, 4*last quarter" 
by instkey year: egen divpshRY1 = total(divpshR)
label variable divpshY1 "Constant 2000 dividend per share - yearly, sum of quarterly" 
by instkey year: gen divpshRY2 = divpshR[_N]*4
label variable divpshY2 "Constant 2000 dividend per share - yearly, 4*last quarter" 

by instkey year: egen reppshY1 = total(reppsh), missing			
label variable divpshY1 "Repurchase per share - yearly, sum of quarterly" 
gen tpopshY1 = divpshY1 + reppshY1


by instkey: generate divsmooth = divpsh
by instkey: replace divsmooth = divpsh[_n-1] if spike == 1

by instkey year: egen divpershare_year = total(divpsh)
preserve
keep instkey year divpershare_year
dups instkey year divpershare_year, t drop
drop _expand
by instkey: generate divgrowth_year = divpershare_year/divpershare_year[_n-1] - 1
compress
tempfile growthfile
save `growthfile'
restore 
merge m:1 instkey year divpershare_year using `growthfile', assert(1 3)
drop _merge

winsor divgrowth_year, generate(divgrowth_yearW) p(0.01) high
drop divgrowth_year
rename divgrowth_yearW divgrowth_year

sort instkey year quarter

by instkey: egen aaa_divpershare_0708 = total(divpsh) if year == 2007 | year == 2008 
by instkey: egen divpershare_0708 = max(aaa_divpershare_0708)
by instkey: egen datapoints_0708 = count(aaa_divpershare_0708)

by instkey: egen aaa_divpershare_0506 = total(divpsh) if year == 2005 | year == 2006 
by instkey: egen divpershare_0506 = max(aaa_divpershare_0506)
by instkey: egen datapoints_0506 = count(aaa_divpershare_0506)

by instkey: generate divchange_0506_0708 = divpershare_0708 - divpershare_0506

generate div_increase = (divgrowth_year > 0)
generate div_constant = (divgrowth_year == 0)
generate div_decrease = (divgrowth_year < 0)


local ylist div_increase div_constant div_decrease
generate inc1_const0 = div_increase
replace inc1_const0 = . if div_decrease == 1
generate dec1_inc0 = div_decrease
replace dec1_inc0 = . if div_constant == 1
generate const1_dec0 = div_constant
replace const1_dec0 = . if div_increase == 1

local zlist inc1_const0 const1_dec0 dec1_inc0

* Generate the covariates 

generate int_to_nonint = ii/tot_nii
label variable int_to_nonint "ratio of interest to noninterest income"

generate nonint_neg = (tot_nii < 0)
label variable nonint_neg "1 if quarterly noninterest income is negative"

foreach var of varlist cash_equ taccass afssec htmsec {			// Replace missing values with zeros before summing
generate `var'NM = `var'
replace `var'NM = 0 if `var' == .
}

generate liq = cash_equNM + taccassNM + afssecNM +  htmsecNM
generate depta = totdep / totassets			// Deposits to total assets
label variable depta "Deposits to total assets"

foreach var in roaa roae marketcap totequ totassets retearnings totrevenue ///
coretierone coretieroner tierone tieroneratio ///
insdivstakshares insdivstaktotper toptenperown flo publicflo instownership noinstinvestors ///
rwcapital rwa divyield totdebt comdivdeclsh regdivdecl commsharesout commstockrep prefstockrep ///
netprofit int_to_nonint rwcapitalratio divpshY1 divpshY2 liq stborr repos totliab depta {
winsor `var', generate(`var'W) p(0.01)
label variable `var'W "`var' winsorized at the 1%"
}

generate mtb = marketcap/totequ
label variable mtb "market-to-book calculated from raw data"
generate mtbW = marketcapW/totequW
label variable mtbW "market-to-book calculated from winsorized data"
generate btm = 1/mtb
generate btmW = 1/mtbW
generate growth1W = (totassetsW-totequW+marketcapW)/totequW
label variable growth1W "growth opportunities based on Denis&Osobov, from winsorized data"
sort instkey quarter
bys instkey: gen lag_totassetsW = totassetsW[_n-1]
*This would be quarter-on-quarter growth. Put year-on-year instead, because that's what we need for the regressions
generate growth2W = (totassetsW-lag_totassetsW)/lag_totassetsW
label variable growth2W "% quarterly asset growth, based on winsorized data"
generate earned_eqW = retearningsW/totequW
label variable earned_eqW "earned equityb ased on Denis&Osobov, from winsorized data"
generate lev_dtc = totdebtW/(totdebtW+totequW)
label variable lev_dtc "leverage as defined by Welch: total debt over total capital (debt+equity)"
generate lev_dta = totdebtW/totassetsW
label variable lev_dta "leverage defined as debt/assets"
generate lev_dte = totdebtW/totequW
label variable lev_dte "leverage defined as debt/equity"
generate lev_fps = (totassetsW - totequW + marketcapW)/marketcapW
label variable lev_fps "leverage as defined by Fahlenbrach et al. JF"
generate lev_mkt = (totdebtW)/(totdebtW+marketcapW)
label variable lev_mkt "Market value of leverage"
generate npl_to_net = npl/net_lns
label variable npl_to_net "Nonperforming loans to net loans"
generate npl_to_grs = npl/gross_lns
label variable npl_to_grs "Nonperforming loans to gross loans"

generate evol_20q = .
forvalues q=20/74 {
generate ingroup = (quarter <= `q' & quarter >= `q'-20)
by instkey: egen temp_eavg_20q = mean(netprofit) if ingroup == 1
by instkey: egen temp_evol_20q = sd(netprofit/temp_eavg_20q) if ingroup == 1
by instkey: replace evol_20q = temp_evol_20q[_n-1] if _n == `q'+1
drop ingroup temp_evol_20q temp_eavg_20q
} 

local xlist roaa roae roaaW roaeW growth1W mtb btm mtbW btmW totassets totassetsW earned_eqW marketcap totequ totequW retearnings totrevenue ///
coretierone coretieroner tierone tieroneratioW totdep ///
insdivstakshares insdivstaktotper toptenperown flo publicflo instownership noinstinvestors ///
rwcapital rwa divyield divyieldW gross_lns net_lns  ///
netprofit vol_year lev_dtc lev_dta lev_dte int_to_nonintW comdivdeclsh regdivdecl commsharesout commstockrep prefstockrep rwcapitalratio ///
rwcapitalratioW reppsh liq liqW stborr repos totliab stborrW reposW totliabW depta npl_to_net npl_to_grs llr_div_grosslns

* generate yearly variables
foreach var of local xlist {
by instkey year: egen avg_`var'Y = mean(`var')
by instkey year: gen `var'4 = `var'[4]  	// The 4th quarter value of the variable in each year
}
generate nonint_negY = (avg_int_to_nonintWY < 0)
label variable nonint_negY "1 if yearly noninterest income is negative"

foreach var in divdivbookvalue divsmooth divpayoutratio {
by instkey year: egen avg_`var'Y = mean(`var')
}

***************************

generate divpayoutratioYALT = avg_comdivdeclshY*avg_commsharesoutY/avg_netprofitY
generate repporatio_comYALT = avg_commstockrepY/avg_netprofitY
label variable repporatio_comYALT "repurchase payout ratio based on repurchases of common stock"
generate repporatio_totYALT = (avg_commstockrepY+avg_prefstockrepY)/avg_netprofitY
label variable repporatio_totYALT "repurchase payout ratio based on repurchases of all classes of stock"
generate tporatio_comYALT = divpayoutratioYALT + repporatio_comYALT
label variable tporatio_comYALT "total payout ratio based on repurchases of common stock"
generate tporatio_totYALT = divpayoutratioYALT + repporatio_totYALT
label variable tporatio_comYALT "total payout ratio based on repurchases of all classes of stock"

summarize divpayoutratio* avg_divpayoutratioY repporatio* tporatio*
foreach var in divpayoutratioYALT repporatio_comYALT repporatio_totYALT tporatio_comYALT tporatio_totYALT {
	winsor `var', generate(helper) p(0.01)
	drop `var'
	rename helper `var'
}
corr divpayoutratio avg_divpayoutratioY

foreach var in divpayoutratio avg_divpayoutratioY {
	generate helper = `var'/100
	drop `var'
	rename helper `var'
}

foreach perf in cumloss cumr {
	by instkey: generate aaa_`perf' = `perf' if period == "2008q4"
	by instkey: egen `perf'_0708 = max(aaa_`perf')
}

sort instkey quarter
by instkey: generate aaa = (1+cumr_0708)*(1+ret_year) - 1 if year == 2009
by instkey: egen cumr_0709 = max(aaa)
drop aaa
label variable cumr_0709 "Cumulative returns during 2007-2009"
generate bbb = (1+cumr_0709)/((1+r)*(1+r[_n-1])) - 1 if quarter == tq(2007q2)
by instkey: egen cumr_07Q309 = max(bbb)
label variable cumr_07Q309 "Cumulative returns during 2007Q3-2009"
drop bbb

by instkey: generate jjj = (1+bhret_year)*(1+bhret_year[_n+1])-1 if quarter == tq(2008q4)
by instkey: egen cumr_0809 = max(jjj)
drop jjj
label variable cumr_0809 "Cumulative returns during 2008-2009"

forvalues y = 6/9 {
	by instkey: generate iii = bhret_year if year == 2000+`y'
	by instkey: egen cumr_0`y' = max(iii)
	drop iii
	label variable cumr_0`y' "Cumulative returns during 200`y'"
	}

sum cumr*
	
by instkey: generate helper = (1+r)*(1+r[_n-1])*(1+r[_n-2])*(1+r[_n-3])*(1+r[_n-4])*(1+r[_n-5]) - 1 if quarter == tq(2008q4)
by instkey: egen cumr_07Q308 = max(helper)
drop helper
label variable cumr_07Q308 "Cumulative returns during 2007Q3-2008"

by instkey: generate helper = (1+r)*(1+r[_n-1]) - 1 if quarter == tq(2008q4)
by instkey: egen cumr_08Q3Q4 = max(helper)
drop helper
label variable cumr_08Q3Q4 "Cumulative returns during 2008q3-q4, the peak of the crisis"

generate nptemp = netprofit
replace nptemp = 0 if netprofit == .

generate ccc = nptemp + ///
nptemp[_n+1] + ///
nptemp[_n+2] + ///
nptemp[_n+3] + ///
nptemp[_n+4] + ///
nptemp[_n+5] + ///
nptemp[_n+6] + ///
nptemp[_n+7] + ///
nptemp[_n+8] + ///
nptemp[_n+9] + ///
nptemp[_n+10] + ///
nptemp[_n+11] if quarter == tq(2007q1)
by instkey: egen cumloss_0709 = max(ccc)
drop ccc
label variable cumloss_0709 "Cumulative losses during 2007-2009"

generate ccc = nptemp + ///
nptemp[_n+1] + ///
nptemp[_n+2] + ///
nptemp[_n+3] + ///
nptemp[_n+4] + ///
nptemp[_n+5] + ///
nptemp[_n+6] + ///
nptemp[_n+7] + ///
nptemp[_n+8] + ///
nptemp[_n+9] if quarter == tq(2007q3)
by instkey: egen cumloss_07Q309 = max(ccc)
drop ccc
label variable cumloss_07Q309 "Cumulative losses during 2007Q3-2009"

generate helper = nptemp + ///
nptemp[_n+1] + ///
nptemp[_n+2] + ///
nptemp[_n+3] + ///
nptemp[_n+4] + ///
nptemp[_n+5] if quarter == tq(2007q3)
by instkey: egen cumloss_07Q308 = max(helper)
drop helper
label variable cumloss_07Q308 "Cumulative losses during 2007Q3-2008"

by instkey: egen ccc = total(nptemp) if year == 2008 | year == 2009
by instkey: egen cumloss_0809 = max(ccc)
drop ccc
label variable cumloss_0809 "Cumulative losses during 2008-2009"

by instkey: egen ccc = total(nptemp) if year == 2009
by instkey: egen cumloss_09 = max(ccc)
drop ccc
label variable cumloss_09 "Cumulative losses during 2009"

by instkey: egen ccc = total(nptemp) if year == 2008
by instkey: egen cumloss_08 = max(ccc)
drop ccc
label variable cumloss_08 "Cumulative losses during 2008"

generate helper = nptemp + ///
nptemp[_n+1] if quarter == tq(2008q4)
by instkey: egen cumloss_08Q3Q4 = max(helper)
drop helper
label variable cumloss_08Q3Q4 "Cumulative losses during 2008q3-q4, the peak of the crisis"

order r cumr cumr* ret_year bhret_year loss cumloss* netprofit

by instkey: egen uuu = mean(totassets) if year >= 2006 & year <= 2009
by instkey: egen avg_assets = max(uuu)
drop uuu

foreach profit in cumloss_07Q308 cumloss_07Q309 cumloss_0809 cumloss_09 cumloss_08 cumloss_08Q3Q4 {
	generate ttt = `profit'/avg_assets
	rename `profit' RAW`profit'
	rename ttt `profit'
}

summarize cumr* cumloss*

drop if cumr_0708 > 5

foreach perf of varlist cumr_07Q308 cumr_07Q309 cumr_0809 cumr_09 cumloss_07Q308 cumloss_07Q309 cumloss_0809 cumloss_09 cumloss_08 cumloss_08Q3Q4 {
	winsor `perf', generate(helper) p(0.01)
	replace `perf' = helper
	drop helper
	}


local xyearlist avg_roaaY avg_roaeY avg_roaaWY avg_roaeWY avg_growth1WY ///
avg_btmY avg_mtbY avg_btmWY avg_mtbWY ///
avg_totassetsY avg_totassetsWY  avg_earned_eqWY avg_marketcapY avg_totequY avg_retearningsY avg_totrevenueY ///
avg_coretieroneY avg_coretieronerY avg_tieroneY avg_tieroneratioWY avg_insdivstaksharesY avg_insdivstaktotperY ///
avg_toptenperownY avg_floY avg_publicfloY avg_instownershipY avg_noinstinvestorsY avg_rwcapitalY avg_rwaY ///
avg_netprofitY avg_vol_yearY avg_gross_lnsY avg_net_lnsY ///
avg_divyieldY avg_lev_dtcY avg_lev_dtaY avg_lev_dteY avg_int_to_nonintWY avg_rwcapitalratioWY nonint_negY

g lnmcapY = ln(avg_marketcapY)

* generate lags
* first, quarterly lags
foreach var of local xlist {
by instkey: generate L_`var'_q = `var'[_n-1]
}
* now, yearly lags -

preserve
keep instkey year `xyearlist'
dups instkey year `xyearlist', t drop
drop _expand
sort instkey year

by instkey: generate roaaW_5yravg = (avg_roaaWY[_n-5] + avg_roaaWY[_n-4] + avg_roaaWY[_n-3] + avg_roaaWY[_n-2] + avg_roaaWY[_n-1])/5
by instkey: generate divyieldW_5yravg = (avg_divyieldY[_n-5] + avg_divyieldY[_n-4] + ///
avg_divyieldY[_n-3] + avg_divyieldY[_n-2] + avg_divyieldY[_n-1])/5

by instkey: generate gloangrowthY = avg_gross_lnsY/avg_gross_lnsY[_n-1] - 1
by instkey: generate nloangrowthY = avg_net_lnsY/avg_net_lnsY[_n-1] - 1
by instkey: generate growth2WY = (avg_totassetsWY-avg_totassetsWY[_n-1])/avg_totassetsWY[_n-1]
generate btm_X_roa = avg_btmWY*avg_roaaWY

generate evol_5y = .
forvalues yr=1998/2013 {
generate ingroup = (year <= `yr' & year >= `yr'-5)
by instkey: egen temp_eavg_5y = mean(avg_netprofitY) if ingroup == 1
by instkey: egen temp_evol_5y = sd(avg_netprofitY/temp_eavg_5y) if ingroup == 1
by instkey: replace evol_5y = temp_evol_5y[_n-1] if year == `yr'+1
order instkey year ingroup temp_eavg_5y temp_evol_5y evol_5y
drop ingroup temp_evol_5y temp_eavg_5y
} 
by instkey: generate L_evol_5y = evol_5y[_n-1]

foreach var of local xyearlist {
by instkey: generate L_`var' = `var'[_n-1]
}
foreach var in btm_X_roa roaaW_5yravg divyieldW_5yravg gloangrowthY nloangrowthY {
by instkey: generate L_`var' = `var'[_n-1]
}


tempfile yearlags
save `yearlags', replace
restore
***
merge m:1 instkey year using `yearlags', assert(3) 
drop _merge

* Create a dummy for TARP

label variable Now_TARP "1 if the firm has TARP money outstanding this quarter"
sort instkey year quarter
by instkey year: egen helper = max(Now_TARP)
rename helper tarp_yr
label variable tarp_yr "1 if the firm has TARP money outstanding this year"

sort instkey year
by instkey: egen tarpever = max(tarp_yr)

preserve										// Add return data using stock price information from SNL
keep instkey quarter year shareprice
keep if quarter == tq(2007q2) | quarter == tq(2008q4)
sort instkey quarter
by instkey: generate cumr_alternative = shareprice/shareprice[_n-1] - 1
keep if quarter == tq(2008q4)
keep instkey cumr_alternative
label variable cumr_alternative "Cumulative returns during July 2007-2008, based on data from SNL"
save SNL_returnfile.dta, replace
restore

merge m:1 instkey using SNL_returnfile.dta, assert(1 3) nogenerate
gen helper = cumr_07Q308
replace cumr_07Q308 = cumr_alternative if helper == .
drop helper 
erase SNL_returnfile.dta

sort instkey quarter											// Create quarterly returns based on SNL data
by instkey: generate returnQ = shareprice/shareprice[_n-1] - 1
label variable returnQ "Quarterly return based on SNL data" 

preserve
generate helper = quarter(dofq(quarter))						// This gives the integer number of the quarter within the year
keep if helper == 4 											// Keep only the last quarter for each firm-year
sort instkey year
by instkey: generate returnY = shareprice/shareprice[_n-1] - 1
label variable returnY "Yearly return based on SNL data"
keep instkey year returnY
save SNL_returnfile_yearly.dta, replace
restore 

merge m:1 instkey year using SNL_returnfile_yearly.dta, assert(1 3) nogenerate
erase SNL_returnfile_yearly.dta

merge m:1 instkey using "`root'\Data\TARP\TARP_repayment_times_small.dta", keep(1 3) nogenerate

merge m:1 period using tempfedrate.dta, keep(1 3) nogenerate
erase tempfedrate.dta
merge m:1 instkey year using tempinterbank.dta, keep(1 3) nogenerate
erase tempinterbank.dta
merge m:1 instkey year using tempHMDA.dta, keep(1 3) nogenerate
erase tempHMDA.dta

gen tpopsh = divpsh + reppsh
label variable tpopsh "Total payout per share"
gen logassets = ln(totassetsW) 
label variable logassets "Log total assets"
gen liqratioW = liqW / totassetsW
label variable liqratioW "Liquidity ratio"
gen rete = retearnings/avg_totequY
label variable rete "Retained earnings to total equity"
generate netintexp_ffrepo = (ffrepo_intexpense - ffrepo_intrevenue)*1000 / totassetsW // Total assets are in dollars while the repos are in 000 dollars	
generate netintexp_ffrepo_1000 = netintexp_ffrepo * 1000
drop netintexp_ffrepo
rename netintexp_ffrepo_1000 netintexp_ffrepo 
label variable netintexp_ffrepo "Net interest expense (gross expense - gross revenue) on fed funds and repos, divided by total assets"
gen divdum1Q = (divpsh > 0)
replace divdum1Q = . if divpsh == .
label variable divdum1Q "Dividend payer - quarterly"
gen tpodum1Q = (tpopsh > 0)
replace tpodum1Q = . if tpopsh == .
label variable tpodum1Q "Total payout dummy - quarterly"
gen repdum1Q = (reppsh > 0)
replace repdum1Q =. if reppsh == .
label variable repdum1Q "Repurchase dummy - quarterly"
generate repyield = reppsh * 100 / shareprice
label variable repyield "Repurchase yield (quarterly)"
generate tpoyield = repyield + divyield
label variable tpoyield "Total payout yield (quarterly)"

bys instkey: egen temp = max(divdum1Q)					// Generate a dummy for never-payers
gen neverpayer = (temp == 0) 
drop temp

sort instkey year period
foreach var of varlist netintexp_ffrepo {
by instkey year: egen avg_`var'Y = mean(`var')
by instkey year: gen `var'4 = `var'[4]  		// The 4th quarter value of the variable in each year
}

foreach var of varlist stborrW reposW { 		// Create interim variables with the missing values set to 0
	generate `var'NM = `var'
	replace `var'NM = 0 if `var' == .
	}

generate stratio = (stborrWNM + reposWNM)/totliabW
label variable stratio "Short term liabilities to total liabilities, quarterly"

drop BBB*

merge 1:1 cusip6 quarter using instownTR_quarterly.dta, keep(1 3) nogenerate
erase instownTR_quarterly.dta
label variable instown_hhi "Ownership Concentration - Herfindahl-Hirschman Index"
label variable instown_perc "Total Inst. Ownership, Percent of Shares Outstanding"

merge 1:1 instkey quarter using  "`root'\Data\Issuance\issuance_quarterly.dta", keep(1 3)
foreach var of varlist grossamtissued_D_non grossamtissued_CE_non grossamtissued_PE_non grossamtissued_D_tarp grossamtissued_CE_tarp grossamtissued_PE_tarp ///
shsissued_D_non shsissued_CE_non shsissued_PE_non shsissued_D_tarp shsissued_CE_tarp shsissued_PE_tarp ///
nrissues_D_non nrissues_CE_non nrissues_PE_non nrissues_D_tarp nrissues_CE_tarp nrissues_PE_tarp {
	replace `var' = 0 if _merge == 1		// If there is no information in the issuance file, issuance that quarter was zero
}
drop _merge

merge 1:1 cusip6 quarter using "`root'\Data\Insider\ownership_imputed_final.dta", keep(1 3) nogenerate
generate pctown_insidersTR_Q = sharesheld_insidersQ / commsharesout
label variable pctown_insidersTR_Q "% of shares held by insiders, excluding blockholders - from Thomson Reuters - quarterly" 
generate helper = pctown_insidersTR_Q 
replace pctown_insidersTR_Q = . if helper > 1
drop helper

merge 1:1 instkey quarter using "temp_insider_ownership_SNLquarterly.dta", keep(1 3) nogenerate
rename pctown_insidersSNL pctown_insidersSNL_Q
erase temp_insider_ownership_SNLquarterly.dta

xtset instkey quarter, quarterly
generate divgrowthQ = divpsh/L.divpsh - 1
label variable divgrowthQ "Quarterly dividend growth"
generate tpogrowthQ = tpopsh/L.tpopsh - 1
label variable tpogrowthQ "Quarterly growth in total payout"
winsor divgrowthQ, generate(divgrowthQW) p(0.01)
winsor tpogrowthQ, generate(tpogrowthQW) p(0.01)

generate neg_divgrQ = (divgrowthQ < 0 & divgrowthQ != .)			// Additional specifications to look at positive vs negative tpo growth
generate pos_divgrQ = (divgrowthQ > 0 & divgrowthQ != .)			// Additional specifications to look at positive vs negative tpo growth
replace neg_divgrQ = . if divgrowthQ == .
replace pos_divgrQ = . if divgrowthQ == .

generate neg_tpogrQ = (tpogrowthQ < 0 & tpogrowthQ != .)			// Additional specifications to look at positive vs negative tpo growth
generate pos_tpogrQ = (tpogrowthQ > 0 & tpogrowthQ != .)			// Additional specifications to look at positive vs negative tpo growth
replace neg_tpogrQ = . if tpogrowthQ == .
replace pos_tpogrQ = . if tpogrowthQ == .

rename divdivbookvalue divbook
rename Now_TARP tarp_q

generate pratioQ = volume_raw_PQ / (volume_raw_PQ + volume_raw_SQ)
label variable pratioQ "Volume purchased divided by total volume (purchased + sold)"
generate sratioQ = volume_raw_SQ / (volume_raw_PQ + volume_raw_SQ)
label variable sratioQ "Volume sold divided by total volume (purchased + sold)"

foreach var of varlist antQ ansQ npr_countQ npr_volumeQ ///
pr_netbuyers_volumeQ pr_netsellers_volumeQ silenceQ net_volumeQ volume_PQ ///
volumeC_PQ volumeTOP_PQ volume_SQ volumeC_SQ volumeTOP_SQ pratioQ sratioQ ///
volume_raw_PQ volume_raw_SQ value_raw_PQ value_raw_SQ {

	by instkey: center `var'
	label variable c_`var' "`var', demeaned for firm FE"
	
	by instkey: egen auxAVG_`var' = mean(`var') if year < 2007		// This line produces the average only for observations prior to 2007
	by instkey: egen AVG_`var' = max(auxAVG_`var')					// A new variable that contains the firm-level average for all observations	
	generate x_`var' = `var' - AVG_`var'
	drop auxAVG_`var' /* AVG_`var' */
	label variable x_`var' "`var', subtracting the pre-crisis average"
	}

drop if divyield == . 	// Drop observations with stale/missing data

gen aaa = roaaW			// Generate a temporary version of roa that treats missing observations as zero
replace roaaW = 0 if aaa == .

generate avgroaW8q = (L.roaaW + L2.roaaW + L3.roaaW + L4.roaaW + L5.roaaW + L6.roaaW + L7.roaaW + L8.roaaW)/8	// Average ROA in the past 8 quarters
generate earningsvol8q = sqrt((1/8)*(L.roaaW - avgroaW8q)^2 + (L2.roaaW - avgroaW8q)^2 + (L3.roaaW - avgroaW8q)^2 + (L4.roaaW - avgroaW8q)^2 + ///
	(L5.roaaW - avgroaW8q)^2 + (L6.roaaW - avgroaW8q)^2 + (L7.roaaW - avgroaW8q)^2 + (L8.roaaW - avgroaW8q)^2)
generate avgroaW4q = (L.roaaW + L2.roaaW + L3.roaaW + L4.roaaW)/4	// Average ROA in the past 8 quarters
generate earningsvol4q = sqrt((1/4)*(L.roaaW - avgroaW4q)^2 + (L2.roaaW - avgroaW4q)^2 + (L3.roaaW - avgroaW4q)^2 + (L4.roaaW - avgroaW4q)^2)
generate earningsvol = earningsvol8q
replace earningsvol = earningsvol4q if earningsvol8q == . // Replace earnings volatility with a shorter version in cases where it is missing
label variable earningsvol "Earnings volatility based on the past 8 quarters, or 4 quarters, if 8 isn't available"

drop roaaW
rename aaa roaaW	// Change back the variable to have the missing values again

merge 1:1 instkey quarter using temp_insider_trading_lags_quarterly.dta, keep(1 3) nogenerate
erase temp_insider_trading_lags_quarterly.dta

foreach var of varlist LantQ LansQ Lnpr_countQ Lnpr_volumeQ ///
Lpr_netbuyers_volumeQ Lpr_netsellers_volumeQ LsilenceQ Lnet_volumeQ Lvolume_PQ ///
LvolumeC_PQ LvolumeTOP_PQ Lvolume_SQ LvolumeC_SQ LvolumeTOP_SQ LpratioQ LsratioQ ///
Lvolume_raw_PQ Lvolume_raw_SQ Lvalue_raw_PQ Lvalue_raw_SQ ///
FantQ FansQ Fnpr_countQ Fnpr_volumeQ ///
Fpr_netbuyers_volumeQ Fpr_netsellers_volumeQ FsilenceQ Fnet_volumeQ Fvolume_PQ ///
FvolumeC_PQ FvolumeTOP_PQ Fvolume_SQ FvolumeC_SQ FvolumeTOP_SQ FpratioQ FsratioQ ///
Fvolume_raw_PQ Fvolume_raw_SQ Fvalue_raw_PQ Fvalue_raw_SQ ///
{

	by instkey: center `var'
	label variable c_`var' "`var', demeaned for firm FE"
	
	by instkey: egen auxAVG_`var' = mean(`var') if year < 2007		// This line produces the average only for observations prior to 2007
	by instkey: egen AVG_`var' = max(auxAVG_`var')					// A new variable that contains the firm-level average for all observations	
	generate x_`var' = `var' - AVG_`var'
	drop auxAVG_`var' /* AVG_`var' */
	label variable x_`var' "`var', subtracting the pre-crisis average"
	}

merge m:1 cusip6 year using "TEMP_nr_of_analysts.dta", keep(1 3) nogenerate
capture erase TEMP_nr_of_analysts.dta
	
save `finalfile_quarterly', replace

 /* IF MAKEFILE == YES */  }

use `finalfile_quarterly', clear

bys instkey year: keep if _n == _N

sort instkey year
by instkey: generate divgrowthY1 = (divpshY1-divpshY1[_n-1])/divpshY1[_n-1]
label variable divgrowthY1 "yearly dividend growth based on total dividends"
by instkey: generate divgrowthY2 = (divpshY2-divpshY2[_n-1])/divpshY2[_n-1]
label variable divgrowthY2 "yearly dividend growth based on (divs in last quarter)*4"
by instkey: generate omitY1 = (divpshY1 == 0 & divpshY1[_n-1] != 0 & divpshY1[_n-1] != . & divpshY1[_n-2] != 0 & divpshY1[_n-2] != .)
by instkey: generate initiateY1 = (divpshY1 > 0 & divpshY1 !=. & divpshY1[_n-1] == 0 & divpshY1[_n-2] == 0) 
generate decreaseY1 = (divgrowthY1 < 0)
generate increaseY1 = (divgrowthY1 > 0 & divgrowthY1 != .)
generate decreaseY2 = (divgrowthY2 < 0)
generate increaseY2 = (divgrowthY2 > 0 & divgrowthY2 != .)
generate divdum1 = 0
replace divdum1 = 1 if divpshY1 > 0
replace divdum1 = . if divpshY1 == .
generate divdum2 = 0
replace divdum2 = 1 if divpshY2 > 0
replace divdum2 = . if divpshY2 == .
generate repdum1 = 0
replace repdum1 = 1 if avg_commstockrepY > 0
replace repdum1 = . if avg_commstockrepY == .
generate tpodum1 = (tpopshY1 > 0)
replace tpodum1 = . if tpopshY1 == .

by instkey: generate repgrowthY1 = (reppshY1-reppshY1[_n-1])/reppshY1[_n-1]					// Repurchase and total payout yield
label variable repgrowthY1 "yearly repurchase growth based on total repurchases"
by instkey: generate tpogrowthY1 = (tpopshY1-tpopshY1[_n-1])/tpopshY1[_n-1]
label variable tpogrowthY1 "yearly tpo growth based on total tpo"
generate avg_repyieldY = reppshY1 * 100 / shareprice
label variable avg_repyieldY "Repurchase yield (yearly)"
generate avg_tpoyieldY = avg_repyieldY + avg_divyieldY
label variable avg_tpoyieldY "Total payout yield (yearly)"

by instkey: generate dygrowthY1 = (avg_divyieldY-avg_divyieldY[_n-1])/avg_divyieldY[_n-1]			// Growth in dividend and tpo yield
label variable dygrowthY1 "growth in dividend yield" 
by instkey: generate rygrowthY1 = (avg_repyieldY-avg_repyieldY[_n-1])/avg_repyieldY[_n-1]
label variable rygrowthY1 "growth in repurchase yield" 
by instkey: generate tygrowthY1 = (avg_tpoyieldY-avg_tpoyieldY[_n-1])/avg_tpoyieldY[_n-1]
label variable tygrowthY1 "growth in total payout yield" 

by instkey: replace dygrowthY1 = 1 if avg_divyieldY > 0 & avg_divyieldY != . & avg_divyieldY[_n-1] == 0
by instkey: replace dygrowthY1 = 0 if avg_divyieldY == 0 & avg_divyieldY[_n-1] == 0
by instkey: replace rygrowthY1 = 1 if avg_repyieldY > 0 & avg_repyieldY != . & avg_repyieldY[_n-1] == 0
by instkey: replace rygrowthY1 = 0 if avg_repyieldY == 0 & avg_repyieldY[_n-1] == 0
by instkey: replace tygrowthY1 = 1 if avg_tpoyieldY > 0 & avg_tpoyieldY != . & avg_tpoyieldY[_n-1] == 0
by instkey: replace tygrowthY1 = 0 if avg_tpoyieldY == 0 & avg_tpoyieldY[_n-1] == 0

by instkey: generate dbgrowthY1 = (avg_divdivbookvalueY-avg_divdivbookvalueY[_n-1])/avg_divdivbookvalueY[_n-1]		// Growth in dividends to book value
label variable dbgrowthY1 "growth in dividends to book equity" 
by instkey: replace dbgrowthY1 = 1 if avg_divdivbookvalueY > 0 & avg_divdivbookvalueY != . & avg_divdivbookvalueY[_n-1] == 0
by instkey: replace dbgrowthY1 = 0 if avg_divdivbookvalueY == 0 & avg_divdivbookvalueY[_n-1] == 0

sort instkey year
generate reteY = avg_retearningsY/avg_totequY
generate tetaY = avg_totequY/avg_totassetsWY

generate avg_logassetsWY = ln(avg_totassetsWY)
generate avg_logassetsY = ln(avg_totassetsY)
generate avg_liqratioY = avg_liqWY / avg_totassetsWY
label variable avg_liqratioY "Liquid assets to total assets, yearly"

foreach var of varlist avg_stborrWY avg_reposWY { 		// Create interim variables with the missing values set to 0
generate `var'NM = `var'
replace `var'NM = 0 if `var' == .
}
generate avg_stratioY = (avg_stborrWYNM + avg_reposWYNM)/avg_totliabWY
label variable avg_stratioY "Short term liabilities to total liabilities, yearly"

foreach j in div rep tpo {
	by instkey: generate `j'growthY1_zero = `j'growthY1
	by instkey: replace `j'growthY1_zero = 0 if `j'pshY1[_n-1] == 0 & `j'pshY1 == 0 	//Set dividend growth to zero if the firm never pays
	by instkey: replace `j'growthY1_zero = 1 if `j'pshY1[_n-1] == 0 & `j'pshY1 > 0 & `j'pshY1 != .	// Set dividend growth to 1 if the firm initiates a dividend
	
	sort instkey year
	order instkey year `j'pshY1 `j'growthY1 `j'growthY1_zero

	winsor `j'growthY1, generate(helper) p(0.05) highonly
	drop `j'growthY1
	rename helper `j'growthY1
	}

generate neg_divgr = (divgrowthY1 < 0 & divgrowthY1 != .)			// Additional specifications to look at positive vs negative tpo growth
generate neg_X_divgr = neg_divgr * divgrowthY1	
generate pos_divgr = (divgrowthY1 > 0 & divgrowthY1 != .)			// Additional specifications to look at positive vs negative tpo growth
replace neg_divgr = . if divgrowthY1 == .
replace pos_divgr = . if divgrowthY1 == .

generate neg_tpogr = (tpogrowthY1 < 0 & tpogrowthY1 != .)			// Additional specifications to look at positive vs negative tpo growth
generate neg_X_tpogr = neg_tpogr * tpogrowthY1	
generate pos_tpogr = (tpogrowthY1 > 0 & tpogrowthY1 != .)			// Additional specifications to look at positive vs negative tpo growth
replace neg_tpogr = . if tpogrowthY1 == .
replace pos_tpogr = . if tpogrowthY1 == .
	
drop *NM					// Drop all the interim variables with the missing values set to 0


generate pratioY = volume_raw_PY / (volume_raw_PY + volume_raw_SY)
label variable pratioY "Volume purchased divided by total volume (purchased + sold)"
generate sratioY = volume_raw_SY / (volume_raw_PY + volume_raw_SY)
label variable sratioY "Volume sold divided by total volume (purchased + sold)"

foreach var of varlist antY ansY npr_countY npr_volumeY ///
pr_netbuyers_volumeY pr_netsellers_volumeY silenceY net_volumeY volume_PY ///
volumeC_PY volumeTOP_PY volume_SY volumeC_SY volumeTOP_SY pratioY sratioY ///
volume_raw_PY volume_raw_SY value_raw_PY value_raw_SY {

	by instkey: center `var'
	label variable c_`var' "`var', demeaned for firm FE"
	
	by instkey: egen auxAVG_`var' = mean(`var') if year < 2007		// This line produces the average only for observations prior to 2007
	by instkey: egen AVG_`var' = max(auxAVG_`var')					// A new variable that contains the firm-level average for all observations	
	generate x_`var' = `var' - AVG_`var'
	drop auxAVG_`var' /* AVG_`var' */
	label variable x_`var' "`var', subtracting the pre-crisis average"
	}


generate fedfundsrate_X_balance = fedfunds_yavgqend * fedfunds_balance * 1000 / avg_totassetsWY
label variable fedfundsrate_X_balance "Use of the fed funds market: balance at year end times the avg rate"

generate helper = avg_instownershipY
replace avg_instownershipY = 0 if helper == .
drop helper

merge m:1 instkey year using instownSNL.dta, keep(1 3) nogenerate	//Institutional ownership from SNL
erase instownSNL.dta

merge m:1 cusip6 year using instownTR.dta, keepusing(instown_perc instown_hhi) keep(1 3 4 5) update nogenerate		// Institutional ownership from TR
erase instownTR.dta
label variable instown_hhi "Ownership Concentration - Herfindahl-Hirschman Index"
label variable instown_perc "Total Inst. Ownership, Percent of Shares Outstanding"

gen helper = instown_perc							
replace instown_perc = 1 if helper > 1 & helper != .
drop helper

foreach var of varlist instown_hhi instown_perc {
	generate helper = `var'					// Replace missing values with zero
	replace `var' = 0 if helper == .
	drop helper
}

merge 1:1 instkey year using  "`root'\Data\Issuance\issuance_yearly.dta", keep(1 3)
foreach var of varlist grossamtissued_D_nonY grossamtissued_CE_nonY grossamtissued_PE_nonY grossamtissued_D_tarpY grossamtissued_CE_tarpY ///
grossamtissued_PE_tarpY ///
shsissued_D_nonY shsissued_CE_nonY shsissued_PE_nonY shsissued_D_tarpY shsissued_CE_tarpY shsissued_PE_tarpY ///
nrissues_D_nonY nrissues_CE_nonY nrissues_PE_nonY nrissues_D_tarpY nrissues_CE_tarpY nrissues_PE_tarpY {
	replace `var' = 0 if _merge == 1		// If there is no information in the issuance file, issuance that year was zero
}
drop _merge
drop grossamtissued_D_non grossamtissued_CE_non grossamtissued_PE_non grossamtissued_D_tarp grossamtissued_CE_tarp ///
grossamtissued_PE_tarp ///
shsissued_D_non shsissued_CE_non shsissued_PE_non shsissued_D_tarp shsissued_CE_tarp shsissued_PE_tarp ///
nrissues_D_non nrissues_CE_non nrissues_PE_non nrissues_D_tarp nrissues_CE_tarp nrissues_PE_tarp

merge 1:1 cusip6 year using "`root'\Data\Insider\ownership_imputed_final_yearly.dta", keep(1 3) nogenerate
generate pctown_insidersTR_Y = sharesheld_insidersY / commsharesout
label variable pctown_insidersTR_Y "% of shares held by insiders, excluding blockholders - from Thomson Reuters - yearly" 
generate helper = pctown_insidersTR_Y 
replace pctown_insidersTR_Y = . if helper > 1
drop helper

merge 1:1 instkey year using "temp_insider_ownership_SNLyearly.dta", keep(1 3) nogenerate
rename pctown_insidersSNL pctown_insidersSNL_Y
erase temp_insider_ownership_SNLyearly.dta

merge 1:1 instkey year using temp_compensation_to2005_averages.dta, keep(1 3) nogenerate
merge 1:1 instkey year using temp_compensation_to2005_president.dta, keep(1 3) nogenerate
merge 1:1 instkey year using temp_compensation_2006to2013_averages.dta, keep(1 3) nogenerate
merge 1:1 instkey year using temp_compensation_2006to2013_president.dta, keep(1 3) nogenerate
merge 1:1 instkey year using temp_insider_trading_lags_yearly.dta, keep(1 3) nogenerate

erase temp_compensation_to2005_averages.dta
erase temp_compensation_to2005_president.dta
erase temp_compensation_2006to2013_averages.dta
erase temp_compensation_2006to2013_president.dta
erase temp_insider_trading_lags_yearly.dta

foreach var of varlist LantY LansY Lnpr_countY Lnpr_volumeY ///
Lpr_netbuyers_volumeY Lpr_netsellers_volumeY LsilenceY Lnet_volumeY Lvolume_PY ///
LvolumeC_PY LvolumeTOP_PY Lvolume_SY LvolumeC_SY LvolumeTOP_SY LpratioY LsratioY ///
Lvolume_raw_PY Lvolume_raw_SY Lvalue_raw_PY Lvalue_raw_SY  ///
FantY FansY Fnpr_countY Fnpr_volumeY ///
Fpr_netbuyers_volumeY Fpr_netsellers_volumeY FsilenceY Fnet_volumeY Fvolume_PY ///
FvolumeC_PY FvolumeTOP_PY Fvolume_SY FvolumeC_SY FvolumeTOP_SY FpratioY FsratioY ///
Fvolume_raw_PY Fvolume_raw_SY Fvalue_raw_PY Fvalue_raw_SY ///
{

	by instkey: center `var'
	label variable c_`var' "`var', demeaned for firm FE"
	
	by instkey: egen auxAVG_`var' = mean(`var') if year < 2007		// This line produces the average only for observations prior to 2007
	by instkey: egen AVG_`var' = max(auxAVG_`var')					// A new variable that contains the firm-level average for all observations	
	generate x_`var' = `var' - AVG_`var'
	drop auxAVG_`var' 
	label variable x_`var' "`var', subtracting the pre-crisis average"
	}


winsor avg_tpoyieldY, generate(avg_tpoyieldYW) p(0.01)
keep if avg_divyieldWY != .		/

save `finalfile_yearly', replace

display "$S_TIME  $S_DATE"
log close
clear all
exit
